counts <- Read10X_h5("filtered_feature_bc_matrix.h5")
fragpath <- "atac_fragments.tsv.gz"
# get gene annotations for hg38
annotation <- GetGRangesFromEnsDb(ensdb = EnsDb.Hsapiens.v86)
genome(annotation) <- "hg38"
seqlevelsStyle(annotation) <- "UCSC"
# seqlevelsStyle(annotation) <- "Ensembl"
# create a Seurat object containing the RNA adata
npm1 <- CreateSeuratObject(
counts = counts$`Gene Expression`,
assay = "RNA"
)
# npm1[["percent.mt"]] <- PercentageFeatureSet(npm1, pattern = "^MT-")
#
# # create ATAC assay and add it to the object
#
# npm1[["ATAC"]] <- CreateChromatinAssay(
# counts = counts$Peaks,
# sep = c(":", "-"),
# fragments = fragpath,
# annotation = annotation
#
# )
npm1 <- SCTransform(npm1) %>% RunPCA() %>% RunUMAP(dims = 1:50, reduction.name = 'umap.rna', reduction.key = 'rnaUMAP_')
##
|
| | 0%
|
|================== | 25%
|
|=================================== | 50%
|
|==================================================== | 75%
|
|======================================================================| 100%
##
|
| | 0%
|
|= | 2%
|
|=== | 4%
|
|==== | 6%
|
|===== | 8%
|
|======= | 9%
|
|======== | 11%
|
|========= | 13%
|
|=========== | 15%
|
|============ | 17%
|
|============= | 19%
|
|=============== | 21%
|
|================ | 23%
|
|================= | 25%
|
|================== | 26%
|
|==================== | 28%
|
|===================== | 30%
|
|====================== | 32%
|
|======================== | 34%
|
|========================= | 36%
|
|========================== | 38%
|
|============================ | 40%
|
|============================= | 42%
|
|============================== | 43%
|
|================================ | 45%
|
|================================= | 47%
|
|================================== | 49%
|
|==================================== | 51%
|
|===================================== | 53%
|
|====================================== | 55%
|
|======================================== | 57%
|
|========================================= | 58%
|
|========================================== | 60%
|
|============================================ | 62%
|
|============================================= | 64%
|
|============================================== | 66%
|
|================================================ | 68%
|
|================================================= | 70%
|
|================================================== | 72%
|
|==================================================== | 74%
|
|===================================================== | 75%
|
|====================================================== | 77%
|
|======================================================= | 79%
|
|========================================================= | 81%
|
|========================================================== | 83%
|
|=========================================================== | 85%
|
|============================================================= | 87%
|
|============================================================== | 89%
|
|=============================================================== | 91%
|
|================================================================= | 92%
|
|================================================================== | 94%
|
|=================================================================== | 96%
|
|===================================================================== | 98%
|
|======================================================================| 100%
##
|
| | 0%
|
|= | 2%
|
|=== | 4%
|
|==== | 6%
|
|===== | 8%
|
|======= | 9%
|
|======== | 11%
|
|========= | 13%
|
|=========== | 15%
|
|============ | 17%
|
|============= | 19%
|
|=============== | 21%
|
|================ | 23%
|
|================= | 25%
|
|================== | 26%
|
|==================== | 28%
|
|===================== | 30%
|
|====================== | 32%
|
|======================== | 34%
|
|========================= | 36%
|
|========================== | 38%
|
|============================ | 40%
|
|============================= | 42%
|
|============================== | 43%
|
|================================ | 45%
|
|================================= | 47%
|
|================================== | 49%
|
|==================================== | 51%
|
|===================================== | 53%
|
|====================================== | 55%
|
|======================================== | 57%
|
|========================================= | 58%
|
|========================================== | 60%
|
|============================================ | 62%
|
|============================================= | 64%
|
|============================================== | 66%
|
|================================================ | 68%
|
|================================================= | 70%
|
|================================================== | 72%
|
|==================================================== | 74%
|
|===================================================== | 75%
|
|====================================================== | 77%
|
|======================================================= | 79%
|
|========================================================= | 81%
|
|========================================================== | 83%
|
|=========================================================== | 85%
|
|============================================================= | 87%
|
|============================================================== | 89%
|
|=============================================================== | 91%
|
|================================================================= | 92%
|
|================================================================== | 94%
|
|=================================================================== | 96%
|
|===================================================================== | 98%
|
|======================================================================| 100%
#Add cell labels from the previously generated scRNA annotation
#reference <- readRDS("~/Documents/Research/NPM1-AML/NPM1_seurat/NPM1_seurat.rds")
reference <- readRDS("~/Downloads/namlab/NPM1_seurat/NPM1_seurat.rds")
npm1 <- AddMetaData(
object = npm1,
metadata = reference@meta.data %>% select(Cell.Ident_Mutation.Status) %>% filter(rownames(.) %in% BiocGenerics::intersect(rownames(npm1@meta.data), rownames(reference@meta.data))))
Idents(npm1) <- "Cell.Ident_Mutation.Status"
# Can compare hsc1 wt with hsc2 mut and hsc2 mut vs hsc1 mut
DefaultAssay(npm1) <- "SCT"
stem_cell_markers_1 <- FindMarkers(npm1, ident.1 = "HSC1_WT", ident.2 = "HSC2_MUT", only.pos = FALSE) %>% arrange(desc(avg_log2FC))
stem_cell_markers_2 <- FindMarkers(npm1, ident.1 = "HSC2_MUT", ident.2 = "HSC1_MUT", only.pos = FALSE) %>% arrange(desc(avg_log2FC))
monocytic_markers <- FindMarkers(npm1, ident.1 = "Monocytic_MUT", ident.2 = "Monocytic_WT", only.pos = FALSE) %>% arrange(desc(avg_log2FC))
gostres <- gost(query = rownames(stem_cell_markers_1), organism = "hsapiens")
gostplot(gostres, capped = FALSE, interactive = TRUE)
original_gene_list <- stem_cell_markers_1$avg_log2FC
names(original_gene_list) <- rownames(stem_cell_markers_1)
gene_list<-na.omit(original_gene_list)
gene_list = sort(gene_list, decreasing = TRUE)
#de_mono = monocytic_markers
de_hsc = stem_cell_markers_1
# de_hsc = monocytic_markers
Name2 = "HSC1_WT vs. HSC2_MUT"
de_hsc$threshold <- as.factor(ifelse(de_hsc$p_val_adj < 0.05 & abs(de_hsc$avg_log2FC) >= 0.25, ifelse(de_hsc$avg_log2FC> 0.25 ,'Up','Down'),'NoSignifi'))
ggplot(data=de_hsc, aes(x=avg_log2FC, y=-log10(p_val_adj), colour=threshold)) +
geom_point(alpha=1, size=1.5) +
scale_color_manual(values=c("green", "red", "grey")) +
xlim(c(-4.5, 4.5)) +
geom_vline(xintercept=c(-.25, .25), lty=4,col="black",lwd=0.8) +
geom_hline(yintercept=-log10(0.05), lty=4,col="black",lwd=0.8) +
annotate("text", x=c(-1.2, 1.2), y=1.8, label=c("-1", "1")) +
annotate("text", x=-4, y=1.8, label="-log10(0.05)") +
labs(x="log2(fold change)", y="-log10 (p-value)", title=Name2) +
theme(plot.title=element_text(hjust=0.5), legend.position="right", legend.title=element_blank(
))
GO.title2 <- paste(Name2,"GO", collapse = " ")
KEGG.title2 <- paste(Name2,"KEGG", collapse = " ")
symbols = rownames(de_hsc)
entrez = mapIds(org.Hs.eg.db, symbols, 'ENTREZID', 'SYMBOL')
de_hsc$entrezgene_id = entrez
logFC <- de_hsc$avg_log2FC
names(logFC) <- de_hsc$entrezgene_id
logFC <- logFC[na.exclude(names(logFC))]
logFC <- sort(logFC, decreasing = T)
#geneName2 <- de_hsc$entrezgene_id[abs(de_hsc$avg_log2FC) > 0.25 & de_hsc$p_val_adj<0.05]
geneName2 <- de_hsc$entrezgene_id#[de_hsc$p_val_adj<0.05]
geneName2 <- na.exclude(geneName2)
# de_hsc$logFC = de_hsc$avg_log2FC
# de_hsc$adj.P.Val = de_hsc$p_val_adj
df = as.data.frame(org.Hs.egGO)
go_gene_list = unique(sort(df$gene_id))
length(geneName2)
## [1] 400
ego2 <- enrichGO(gene = geneName2,
universe = go_gene_list,
OrgDb = org.Hs.eg.db,
ont = "BP",
pAdjustMethod = "BH",
pvalueCutoff = 0.01,
qvalueCutoff = 0.05,
readable = TRUE)
head(ego2)
## ID Description
## GO:1903708 GO:1903708 positive regulation of hemopoiesis
## GO:1902107 GO:1902107 positive regulation of leukocyte differentiation
## GO:0045785 GO:0045785 positive regulation of cell adhesion
## GO:0030098 GO:0030098 lymphocyte differentiation
## GO:0045621 GO:0045621 positive regulation of lymphocyte differentiation
## GO:0045582 GO:0045582 positive regulation of T cell differentiation
## GeneRatio BgRatio pvalue p.adjust qvalue
## GO:1903708 23/366 204/18866 1.209466e-11 5.118459e-08 4.075263e-08
## GO:1902107 20/366 159/18866 3.678704e-11 7.784138e-08 6.197648e-08
## GO:0045785 31/366 428/18866 3.353760e-10 4.731038e-07 3.766803e-07
## GO:0030098 28/366 368/18866 8.251124e-10 8.729689e-07 6.950486e-07
## GO:0045621 15/366 105/18866 1.782979e-09 1.509113e-06 1.201540e-06
## GO:0045582 14/366 92/18866 2.676525e-09 1.887842e-06 1.503080e-06
## geneID
## GO:1903708 ANXA1/PTPRC/PRKCA/CD74/RIPK2/HLA-DRB1/HIF1A/NFKBIZ/SYK/RB1/MAPK14/FOS/RHOA/VSIR/RHOH/CA2/TOX/MYB/RHEX/TESPA1/ZBTB16/SOX4/RUNX1
## GO:1902107 ANXA1/PTPRC/PRKCA/CD74/RIPK2/HLA-DRB1/NFKBIZ/SYK/RB1/FOS/RHOA/VSIR/RHOH/CA2/TOX/MYB/TESPA1/ZBTB16/SOX4/RUNX1
## GO:0045785 CD36/ANXA1/CD44/PTPRC/PRKCA/CD74/PIK3R1/RIPK2/PRKCE/NFKBIZ/SYK/ROCK1/DOCK5/TNFSF13B/RHOA/MAP4K4/DISC1/PTPRJ/VSIR/RHOH/SKAP1/DOCK1/MYB/VAV3/ANGPT1/ITGA4/TESPA1/ZBTB16/SOX4/RUNX1/CDK6
## GO:0030098 ANXA1/DOCK10/HDAC9/PTPRC/KLF6/CD74/PIK3R1/PTGER4/RIPK2/NFKBIZ/SYK/GPR183/RHOA/PTPRJ/VSIR/RUNX2/RHOH/LEPR/TOX/MYB/ITGA4/IL18R1/TESPA1/BCL2/ZBTB16/SOX4/RUNX1/CDK6
## GO:0045621 ANXA1/PTPRC/CD74/RIPK2/NFKBIZ/SYK/RHOA/VSIR/RHOH/TOX/MYB/TESPA1/ZBTB16/SOX4/RUNX1
## GO:0045582 ANXA1/PTPRC/CD74/RIPK2/NFKBIZ/SYK/RHOA/VSIR/RHOH/MYB/TESPA1/ZBTB16/SOX4/RUNX1
## Count
## GO:1903708 23
## GO:1902107 20
## GO:0045785 31
## GO:0030098 28
## GO:0045621 15
## GO:0045582 14
length(ego2$ID)
## [1] 151
dotplot(ego2, showCategory=15) + ggtitle("GO enrichment")
gse <- gseGO(geneList=gene_list,
ont ="ALL",
keyType = "SYMBOL",
minGSSize = 3,
maxGSSize = 800,
pvalueCutoff = 0.05,
verbose = TRUE,
OrgDb = org.Hs.eg.db,
pAdjustMethod = "none")
require(DOSE)
dotplot(gse, showCategory=10, split=".sign") + facet_grid(.~.sign)
x2 <- pairwise_termsim(gse)
emapplot(x2, showCategory = 10)
cnetplot(gse, categorySize="pvalue", foldChange=gene_list, showCategory = 3)
ridgeplot(gse) + labs(x = "enrichment distribution")
gseaplot(gse, by = "all", title = gse$Description[1], geneSetID = 1)
ids<-bitr(names(original_gene_list), fromType = "SYMBOL", toType = "ENTREZID", OrgDb=org.Hs.eg.db)
dedup_ids = ids[!duplicated(ids[c("SYMBOL")]),]
df2 = stem_cell_markers_1[rownames(stem_cell_markers_1) %in% dedup_ids$SYMBOL,]
df2$Y = dedup_ids$ENTREZID
kegg_gene_list <- df2$avg_log2FC
names(kegg_gene_list) <- df2$Y
kegg_gene_list<-na.omit(kegg_gene_list)
kegg_gene_list = sort(kegg_gene_list, decreasing = TRUE)
kegg_organism = "hsa"
kk2 <- gseKEGG(geneList = kegg_gene_list,
organism = kegg_organism,
minGSSize = 3,
maxGSSize = 800,
pvalueCutoff = 0.05,
pAdjustMethod = "none",
keyType = "ncbi-geneid")
dotplot(kk2, showCategory = 10, title = "Enriched Pathways" , split=".sign") + facet_grid(.~.sign)
x3 <- pairwise_termsim(kk2)
emapplot(x3)
cnetplot(kk2, categorySize="pvalue", foldChange=gene_list)
ridgeplot(kk2) + labs(x = "enrichment distribution")
gseaplot(kk2, by = "all", title = kk2$Description[1], geneSetID = 1)
hsa_path <- pathview(gene.data=kegg_gene_list, pathway.id="hsa05222", species = kegg_organism)
knitr::include_graphics("hsa05222.pathview.png")
knitr::include_graphics("hsa05222.png")